####################################################################
#***2019 analysis
## Packages
# To install and open the R packages that you need for this code. 

need <- c('tidyverse','readstata13','lfe','glue', 'stargazer','arm', 'broom', 'ggplot2', 'dotwhisker', 'gridExtra', 'fixest', 'rdrobust')
have <- need %in% rownames(installed.packages()) 
if(any(!have)) install.packages(need[!have]) 
invisible(lapply(need, library, character.only=T)) 

# old version of lfe:
#devtools::install_version("lfe", version = "2.8-5.1", repos = "http://cran.us.r-project.org")

# Change path to whereever you place the models
# To set up the working directory. 
script_folder = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(glue('{script_folder}'))
rm(list = ls())
setwd("../")

## load in RD data:
load("5prepdata/final_rd_data.RData")

##################################################################
# Main effects
##################################################################

# interact won x delegation size/number of seats in congress
rd.data <- rd.data %>% ungroup %>% 
  group_by(state.x) %>% 
  mutate(state.del.size = max(district.x, na.rm=T))
rd.data$del.size.prop <- rd.data$state.del.size/rd.data$del.size
rd.data$del.size.propXwon <- rd.data$del.size.prop * rd.data$won_election

main.specification <- function(x) {
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  
  out <- felm(x[rd.data$kw>0] ~ won_election + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0])
  #out <- feols(ever_ranprim_h ~ won_election + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates| year + state, weights = rd.data$kw[rd.data$kw>0], cluster="state",  data = rd.data %>% filter(kw > 0))
  bw <- round(rd.data$bandwidth[1], 2)
  
  return(list(out, bw))
}

main.specification.nocontrols <- function(x) {
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  
  out <- felm(x[rd.data$kw>0] ~ won_election + victory_marg + f_x_win | year | 0 | state, data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0])
  bw <- round(rd.data$bandwidth[1], 2)
  
  return(list(out, bw))
}

main.specification.chamber <- function(x, chamber.num) {
  rd.data <- filter(rd.data, chamber==chamber.num)
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  
  out <- felm(x[rd.data$kw>0] ~ won_election + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0])
  bw <- round(rd.data$bandwidth[1], 2)
  
  return(list(out, bw))
}


extract.mean <- function(x) {
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  rd.data$cutoff <- 0
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  out <- round(mean(x[rd.data$kw>0]), 3)
  return(out)
}

extract.sd <- function(x) {
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  rd.data$cutoff <- 0
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  out <- round(sd(x[rd.data$kw>0]), 3)
  return(out)
}


#################################################################
# HOUSE
#################################################################

m1<-main.specification(rd.data$ever_ranprim_h)
m2<-main.specification(rd.data$ever_winprim_h)
m3<-main.specification(rd.data$ever_runhouse)
m4<-main.specification(rd.data$ever_winhouse)

means.all<-round(c(extract.mean(rd.data$ever_ranprim_h),extract.mean(rd.data$ever_winprim_h),extract.mean(rd.data$ever_runhouse),extract.mean(rd.data$ever_winhouse)), 3)
sds.all<-round(c(extract.sd(rd.data$ever_ranprim_h),extract.sd(rd.data$ever_winprim_h),extract.sd(rd.data$ever_runhouse),extract.sd(rd.data$ever_winhouse)), 3)


stargazer(list(m1[[1]], m2[[1]], m3[[1]], m4[[1]]),
          title = "Effect of state legislative service on career progression to Congressional candidacy and representation",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Table 3.tex",
          label = "tab:main_results",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          add.lines = list(controls0a=c("Outcome mean", means.all),
                           controls0b=c("Outcome s.d.", sds.all),
                           c("Bandwidth", m1[[2]], m2[[2]], m3[[2]], m4[[2]])
                           ),
          keep = c("won_election"),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)




#################################################################
# HOUSE -- NO CONTROLS ROBUSTNESS
#################################################################



m1<-main.specification.nocontrols(rd.data$ever_ranprim_h)
m2<-main.specification.nocontrols(rd.data$ever_winprim_h)
m3<-main.specification.nocontrols(rd.data$ever_runhouse)
m4<-main.specification.nocontrols(rd.data$ever_winhouse)


means.all<-round(c(extract.mean(rd.data$ever_ranprim_h),extract.mean(rd.data$ever_winprim_h),extract.mean(rd.data$ever_runhouse),extract.mean(rd.data$ever_winhouse)), 3)
sds.all<-round(c(extract.sd(rd.data$ever_ranprim_h),extract.sd(rd.data$ever_winprim_h),extract.sd(rd.data$ever_runhouse),extract.sd(rd.data$ever_winhouse)), 3)


stargazer(list(m1[[1]], m2[[1]], m3[[1]], m4[[1]]),
          title = "Effect of state legislative service on career progression: No controls",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 18.tex",
          label = "tab:main_results_nocontrols",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          add.lines = list(controls0a=c("Outcome mean", means.all),
                           controls0b=c("Outcome s.d.", sds.all),
                           c("Bandwidth", m1[[2]], m2[[2]], m3[[2]], m4[[2]])
          ),
          keep = c("won_election"),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)


#################################################################
# SENATE
#################################################################

m1<-main.specification(rd.data$ever_ranprim_s)
m2<-main.specification(rd.data$ever_winprim_s)
m3<-main.specification(rd.data$ever_runsenate)
m4<-main.specification(rd.data$ever_winsenate)

means.all<-round(c(extract.mean(rd.data$ever_ranprim_s),extract.mean(rd.data$ever_winprim_s),extract.mean(rd.data$ever_runsenate),extract.mean(rd.data$ever_winsenate)), 3)
sds.all<-round(c(extract.sd(rd.data$ever_ranprim_s),extract.sd(rd.data$ever_winprim_s),extract.sd(rd.data$ever_runsenate),extract.sd(rd.data$ever_winsenate)), 3)


stargazer(list(m1[[1]], m2[[1]], m3[[1]], m4[[1]]),
          title = "Effect of state legislative service on career progression to Senate candidacy and representation",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 5.tex",
          label = "tab:main_results_senate",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for Senate primary", "Won Senate primary", "Ran for Senate general", "Won Senate general"),
          add.lines = list(controls0a=c("DV mean, bandwidth sample", means.all),
                           controls0b=c("DV SD, bandwidth sample", sds.all),
                           c("Bandwidth", m1[[2]], m2[[2]], m3[[2]], m4[[2]])
          ),
          keep = c("won_election"),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)






##################################################################
# split by upper / lower house
##################################################################
# chamber == 9 ==  house

m1<-main.specification.chamber(rd.data$ever_ranprim_h[rd.data$chamber==9], 9)
m2<-main.specification.chamber(rd.data$ever_winprim_h[rd.data$chamber==9], 9)
m3<-main.specification.chamber(rd.data$ever_runhouse[rd.data$chamber==9], 9)
m4<-main.specification.chamber(rd.data$ever_winhouse[rd.data$chamber==9], 9)

m5<-main.specification.chamber(rd.data$ever_ranprim_h[rd.data$chamber==8], 8)
m6<-main.specification.chamber(rd.data$ever_winprim_h[rd.data$chamber==8], 8)
m7<-main.specification.chamber(rd.data$ever_runhouse[rd.data$chamber==8], 8)
m8<-main.specification.chamber(rd.data$ever_winhouse[rd.data$chamber==8], 8)

stargazer(list(m1[[1]], m2[[1]], m3[[1]], m4[[1]]),
          title = "Effect of state legislative service on career progression to Congressional candidacy - Lower chamber",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 11.tex",
          label = "tab:main_results_lower",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          keep = c("won_election"),
          add.lines=list(c("Bandwidth", m1[[2]], m2[[2]], m3[[2]], m4[[2]])),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The sample is limited to individuals who are elected to their legislature's lower chamber only. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)

stargazer(list(m5[[1]], m6[[1]], m7[[1]], m8[[1]]),
          title = "Effect of state legislative service on career progression to Congressional candidacy - Upper chamber",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 10.tex",
          label = "tab:main_results_upper",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          keep = c("won_election"),
          add.lines=list(c("Bandwidth", m5[[2]], m6[[2]], m7[[2]], m8[[2]])),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The sample is limited to individuals who are elected to their legislature's lower chamber only. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)



##################################################################
###### LOGITS
##################################################################

main.specification.logit <- function(x) {
  rd.data$cutoff<-NULL
  rd.data$bandwidth<-NULL
  rd.data$kw<-NULL
  
  y<-rdbwselect(x, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data$running_terms_served + rd.data$dem + rd.data$rep + rd.data$spc_elec + rd.data$term_length + rd.data$number_candidates + rd.data$year)
  rd.data$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0
  
  out <- glm(x[rd.data$kw>0] ~ won_election + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates + as.factor(year) + as.factor(state), data = rd.data[rd.data$kw>0,], weights = rd.data$kw[rd.data$kw>0], family="binomial")
  return(out)
}

m1<-main.specification.logit(rd.data$ever_ranprim_h)
m2<-main.specification.logit(rd.data$ever_winprim_h)
m3<-main.specification.logit(rd.data$ever_runhouse)
m4<-main.specification.logit(rd.data$ever_winhouse)

stargazer(list(m1, m2, m3,m4),
          title = "Effect of state legislative service on career progression to Congressional candidacy (logit model)",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 6.tex",
          label = "tab:main_results_logit",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          keep = c("won_election"),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)



#save(rd.data, m1, m2, m3, m4, file = 'logitmodels.RData')



















